package happy.research.data;
import happy.coding.io.FileIO;
import happy.coding.io.Strings;
import happy.coding.io.net.URLReader;
import happy.coding.system.Debug;
import happy.coding.system.Systems;
import java.io.File;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class MTimeCrawler
{
String dir = Systems.getDesktop() + "mtime.com/";
public MTimeCrawler()
{
FileIO.makeDirectory(dir);
}
public void crawl_web_pages() throws Exception
{
String filePath = "./src/main/resources/mtime.txt";
List<String> urls = FileIO.readAsList(filePath);
for (String url : urls)
{
String html = URLReader.read(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/";
FileIO.makeDirectory(dirPath);
FileIO.writeString(dirPath + name + ".html", html);
}
}
public void parse_overall_ratings() throws Exception
{
File directory = new File(dir);
File[] dirs = directory.listFiles();
for (File d : dirs)
{
String fname = d.getName();
String file = d.getPath() + "/" + fname + ".html";
Document doc = Jsoup.parse(FileIO.readAsString(file));
MTimeMovie mm = new MTimeMovie();
String name = doc.select("span[property=v:itemreviewed]").text();
mm.setName(name);
String year = doc.select("a.c_666").first().text();
mm.setYear(Integer.parseInt(year));
}
}
public static void main(String[] args) throws Exception
{
MTimeCrawler m = new MTimeCrawler();
if (Debug.OFF) m.crawl_web_pages();
if (Debug.OFF) m.parse_overall_ratings();
}
}